After some researches about meteostat data nearest station in DE that belongs to Kelheim region are:
there are many of them so I am starting to think about extracting all from the Bayern or extract the nearest from longtitude/latitude point with the Kelheim shapefile(using json and Euclid distances)
Kelheim has no weather station, but it could be reconstructed with 2 other
Hohenfels with id: “10775” and Ingolstadt with id:“10860” kelheim_data = {weight1}x{hohenfels} + {weight2}x{inglstadt}
Also this site shows, that there are many of the Kelheim stations in this area, but meteostat doesn’t contain them https://www.wunderground.com/dashboard/pws/IKELHE5
weatherstack_kelheim = read_delim("data/Kelheim_weather_since_july_2008.csv",delim = ",")
## Rows: 120312 Columns: 6
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): description
## dbl (4): hour, precip, visibility, totalsnow_daily
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(weatherstack_kelheim)
## # A tibble: 120,312 × 6
## date hour description precip visibility totalsnow_daily
## <date> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2008-07-01 0 Clear 0 10 0
## 2 2008-07-01 100 Clear 0 10 0
## 3 2008-07-01 200 Clear 0 10 0
## 4 2008-07-01 300 Clear 0 10 0
## 5 2008-07-01 400 Clear 0 10 0
## 6 2008-07-01 500 Clear 0 10 0
## 7 2008-07-01 600 Sunny 0 10 0
## 8 2008-07-01 700 Sunny 0 10 0
## 9 2008-07-01 800 Sunny 0 10 0
## 10 2008-07-01 900 Sunny 0 10 0
## # … with 120,302 more rows
## # ℹ Use `print(n = ...)` to see more rows
What to take as a reffer point isn’t clear because of the date(before/after covid) and weather type (sunny,clear,temperature) Also there is no temperature in it :/
#global_mobility = read_delim("https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv",",")
#de_mobility = global_mobility %>% filter(country_region_code == "DE")
#print(unique(de_mobility$sub_region_1))
As we can see the most precise region to filter data from is Bavaria :/
Relevant data for the , mobility
#bavaria_mobility = de_mobility %>% filter(sub_region_1 == "Bavaria")
#bavaria_mobility = bavaria_mobility %>% #select(country_region,sub_region_1,date,residential_percent_change_from_baseline) %>%
# mutate(residential_percent_change_from_baseline = -residential_percent_change_from_baseline,
# source = "Google")%>%
# rename(BundeslandID = sub_region_1,not_at_home_change = residential_percent_change_from_baseline)
#bavaria_mobility = bavaria_mobility %>% select(date,BundeslandID,not_at_home_change,source)
#Need to filter out weekends
#plt = ggplot(bavaria_mobility)+
# geom_point(aes(x = date,y = not_at_home_change))
#ggplotly(plt)
snz_mobility = read_delim("data/LK_mobilityData_weekdays.csv",";")
## Rows: 50652 Columns: 4
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Landkreis
## dbl (3): date, outOfHomeDuration, percentageChangeComparedToBeforeCorona
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Kelheim
snz_mobility_kelheim = snz_mobility %>% filter(Landkreis == "Landkreis Kelheim") %>% mutate(source = "senozon") %>% select(-outOfHomeDuration) %>% rename(not_at_home_change = percentageChangeComparedToBeforeCorona)
snz_mobility_kelheim$date = as.Date(strptime(snz_mobility_kelheim$date,"%Y%m%d"))
#Berlin
snz_mobility_berlin = snz_mobility %>% filter(Landkreis == "Berlin") %>% mutate(source = "senozon") %>% select(-outOfHomeDuration) %>% rename(not_at_home_change = percentageChangeComparedToBeforeCorona)
snz_mobility_berlin$date = as.Date(strptime(snz_mobility_berlin$date,"%Y%m%d"))
colors <- c("Berlin" = "blue", "Kelheim" = "red")
plt = ggplot()+
geom_point(data = snz_mobility_kelheim,aes(x = date,y = not_at_home_change,color = "Berlin"))+
geom_point(data = snz_mobility_berlin,aes(x = date,y = not_at_home_change,color = "Kelheim"))+
scale_colour_manual(values = colors)
ggplotly(plt)
We take berlin weather from station in Schoenefeld with id = 10384
## Rows: 24417 Columns: 11
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): X2, X3, X4, X5, X6, X7, X8, X9, X10, X11
## date (1): X1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# think about duration of description column
# Kelheim
weatherstack_kelheim_daily = weatherstack_kelheim %>%
group_by(date) %>%
summarize(description = description,precip_day = sum(precip),visibility_mean = mean(visibility),totalsnow_daily = mean(totalsnow_daily))
## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
weatherstack_kelheim_weekly = weatherstack_kelheim_daily %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(description = description,date = first(date), precip_week = sum(precip_day),visibility_mean = mean(visibility_mean),totalsnow_weekly =sum( totalsnow_daily))
## `summarise()` has grouped output by 'year_week'. You can override using the `.groups` argument.
weatherstack_kelheim_weekly = unique(weatherstack_kelheim_weekly)
print(weatherstack_kelheim_weekly)
## # A tibble: 8,194 × 6
## # Groups: year_week [717]
## year_week description date precip_week visibility_mean totalsnow_weekly
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 2008-27 Clear 2008-07-01 185. 9.22 0
## 2 2008-27 Sunny 2008-07-01 185. 9.22 0
## 3 2008-27 Partly cloudy 2008-07-01 185. 9.22 0
## 4 2008-27 Patchy light rain 2008-07-01 185. 9.22 0
## 5 2008-27 Moderate rain 2008-07-01 185. 9.22 0
## 6 2008-27 Light drizzle 2008-07-01 185. 9.22 0
## 7 2008-27 Patchy light drizzle 2008-07-01 185. 9.22 0
## 8 2008-27 Mist 2008-07-01 185. 9.22 0
## 9 2008-27 Fog 2008-07-01 185. 9.22 0
## 10 2008-28 Fog 2008-07-07 1289. 8.20 0
## # … with 8,184 more rows
## # ℹ Use `print(n = ...)` to see more rows
#Berlin
berlin_weather_weekly = berlin_weather_daily %>% filter(year(date) >=2020) %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(date = first(date), prcp_week = sum(prcp), tavg= mean(tavg),snow_week =sum( snow),wspd = mean(wspd),tmax = max(tmax)) %>%
arrange(year_week)
print(berlin_weather_weekly)
## # A tibble: 139 × 7
## year_week date prcp_week tavg snow_week wspd tmax
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-1 2020-01-01 10.5 2.24 0 17.5 7.2
## 2 2020-10 2020-03-02 7.6 4.7 0 15.3 10.3
## 3 2020-11 2020-03-09 16 6.6 0 22.5 13.8
## 4 2020-12 2020-03-16 0 7.17 0 14.0 17.3
## 5 2020-13 2020-03-23 0.1 4.54 0 15.8 15.9
## 6 2020-14 2020-03-30 0.1 5.01 0 15.8 16.2
## 7 2020-15 2020-04-06 3.7 12 0 11.0 23.7
## 8 2020-16 2020-04-13 0.1 9.3 0 17.0 18.1
## 9 2020-17 2020-04-20 0 12.0 0 15.1 22.5
## 10 2020-18 2020-04-27 27.3 12.8 0 13.2 24
## # … with 129 more rows
## # ℹ Use `print(n = ...)` to see more rows
#mob_joined = rbind(snz_mobility_kelheim,bavaria_mobility)
#Kelheim
snz_mobility_kelheim_year_week = snz_mobility_kelheim %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(date = first(date),not_at_home_change = mean(not_at_home_change))
mob_joined_with_weather_kelheim = snz_mobility_kelheim_year_week %>% inner_join(weatherstack_kelheim_weekly, by = "year_week") %>% select(-date.y) %>% rename(date = date.x)
print(mob_joined_with_weather_kelheim)
## # A tibble: 1,083 × 7
## year_week date not_at_home_change description precip_week visibility_mean totalsnow_weekly
## <chr> <date> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2020-10 2020-03-06 0 Partly cloudy 653. 8.63 33.6
## 2 2020-10 2020-03-06 0 Light rain 653. 8.63 33.6
## 3 2020-10 2020-03-06 0 Light sleet showers 653. 8.63 33.6
## 4 2020-10 2020-03-06 0 Patchy rain possible 653. 8.63 33.6
## 5 2020-10 2020-03-06 0 Overcast 653. 8.63 33.6
## 6 2020-10 2020-03-06 0 Moderate or heavy sleet 653. 8.63 33.6
## 7 2020-10 2020-03-06 0 Patchy heavy snow 653. 8.63 33.6
## 8 2020-10 2020-03-06 0 Light sleet 653. 8.63 33.6
## 9 2020-10 2020-03-06 0 Moderate rain 653. 8.63 33.6
## 10 2020-10 2020-03-06 0 Mist 653. 8.63 33.6
## # … with 1,073 more rows
## # ℹ Use `print(n = ...)` to see more rows
#Berlin
snz_mobility_berlin_year_week = snz_mobility_berlin %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(date = first(date),not_at_home_change = mean(not_at_home_change))
mob_joined_with_weather_berlin = snz_mobility_berlin_year_week %>% inner_join(berlin_weather_weekly, by = "year_week") %>% select(-date.y) %>% rename(date = date.x)
print(mob_joined_with_weather_berlin)
## # A tibble: 126 × 8
## year_week date not_at_home_change prcp_week tavg snow_week wspd tmax
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-10 2020-03-06 0 7.6 4.7 0 15.3 10.3
## 2 2020-11 2020-03-13 -4 16 6.6 0 22.5 13.8
## 3 2020-12 2020-03-20 -26 0 7.17 0 14.0 17.3
## 4 2020-13 2020-03-27 -39 0.1 4.54 0 15.8 15.9
## 5 2020-14 2020-04-03 -37 0.1 5.01 0 15.8 16.2
## 6 2020-15 2020-04-10 0 3.7 12 0 11.0 23.7
## 7 2020-16 2020-04-17 -32 0.1 9.3 0 17.0 18.1
## 8 2020-17 2020-04-24 -27 0 12.0 0 15.1 22.5
## 9 2020-18 2020-05-01 9 27.3 12.8 0 13.2 24
## 10 2020-19 2020-05-08 22 1.8 12.3 0 12.3 23.7
## # … with 116 more rows
## # ℹ Use `print(n = ...)` to see more rows
#First plot with colour as precipitation
shapes <- c("Berlin" = 5, "Kelheim" = 3)
plt_color = ggplot()+
geom_point(data = mob_joined_with_weather_kelheim,aes(x = date,y = not_at_home_change,colour = precip_week,shape = "Kelheim"))+
#geom_point(data = mob_joined_with_weather_berlin,aes(x = date,y = not_at_home_change,colour = prcp_week,shape = "Berlin"))+
scale_color_gradient2()+
scale_shape_manual(values = shapes)
ggplotly(plt_color)
#Second plot as another line as precipitation
plt_line = ggplot(mob_joined_with_weather_kelheim)+
geom_point(aes(x = date,y = not_at_home_change))+
geom_line(aes(x = date,y = precip_week*0.5,color = "red"))
ggplotly(plt_line)
plt_hist_precip = ggplot(mob_joined_with_weather_kelheim,aes(x = precip_week,y = not_at_home_change))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 2,fill = "blue")
ggplotly(plt_hist_precip)
plt_hist_visibility = ggplot(mob_joined_with_weather_kelheim,aes(x = visibility_mean,y = not_at_home_change))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 0.5,fill = "blue")
ggplotly(plt_hist_visibility)
plt_hist_totalsnow = ggplot(mob_joined_with_weather_kelheim,aes(x = totalsnow_weekly,y = not_at_home_change))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 7,fill = "blue")
ggplotly(plt_hist_totalsnow)
#this is a bad plot because it takes description of 1 day of the week
plt_hist_descr = ggplot(mob_joined_with_weather_kelheim,aes(x = description,y = not_at_home_change))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 5,fill = "blue")+
coord_flip()
ggplotly(plt_hist_descr)
Ingolstadt data from id = 10860 station
ingolstadt_weather = read_delim("https://bulk.meteostat.net/v2/daily/10860.csv.gz",",",col_names = FALSE)
## Rows: 19285 Columns: 11
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): X2, X3, X4, X5, X6, X7, X8, X9, X10
## lgl (1): X11
## date (1): X1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(ingolstadt_weather) = c("date", "tavg", "tmin", "tmax", "prcp", "snow", "wdir", "wspd", "wpgt", "pres", "tsun")
# We don't need data of weather before 2020, because of snz_mobility date, also data isn't precise
ingolstadt_weather = ingolstadt_weather %>% filter(year(date)>=2020)%>% replace_na(list(snow = 0))
print(ingolstadt_weather)
## # A tibble: 967 × 11
## date tavg tmin tmax prcp snow wdir wspd wpgt pres tsun
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 2020-01-01 -0.8 -3.5 3.4 0 0 70 6.5 20.5 1036. NA
## 2 2020-01-02 -2.9 -6.1 -1.2 0 0 243 4 16.6 1033. NA
## 3 2020-01-03 1.2 -2.4 5.9 0.3 0 218 7.2 29.5 1028 NA
## 4 2020-01-04 4.4 3.4 6.2 0.5 0 278 20.5 48.2 1030 NA
## 5 2020-01-05 2.8 -1.8 4.5 0 0 271 9 38.9 1037. NA
## 6 2020-01-06 -1.2 -3.7 4.5 0 0 119 4.3 14.8 1032. NA
## 7 2020-01-07 -0.4 -5 2.6 0.9 0 233 5 24.1 1032. NA
## 8 2020-01-08 1 -2.3 3.2 0 0 100 2.9 13 1032. NA
## 9 2020-01-09 4.3 -0.1 10.8 0 0 109 2.9 11.2 1024. NA
## 10 2020-01-10 4.9 -1.2 11.2 0.2 0 230 9.4 37.1 1023 NA
## # … with 957 more rows
## # ℹ Use `print(n = ...)` to see more rows
Hohenfels data from id = 10775 station
hohenfels_weather = read_delim("https://bulk.meteostat.net/v2/daily/10775.csv.gz",",",col_names = FALSE)
## Rows: 6308 Columns: 11
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): X2, X3, X4, X5, X6, X7, X8, X10
## lgl (2): X9, X11
## date (1): X1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(hohenfels_weather) = c("date", "tavg", "tmin", "tmax", "prcp", "snow", "wdir", "wspd", "wpgt", "pres", "tsun")
# We don't need data of weather before 2020, because of snz_mobility date, also data isn't precise
hohenfels_weather = hohenfels_weather %>% filter(year(date)>=2020) %>% replace_na(list(snow = 0))
print(hohenfels_weather)
## # A tibble: 932 × 11
## date tavg tmin tmax prcp snow wdir wspd wpgt pres tsun
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl> <lgl>
## 1 2020-01-01 -4.3 -8.2 1.5 NA 0 NA NA NA NA NA
## 2 2020-01-02 -4.5 -9.7 -0.7 NA 0 355 0.6 NA 1033 NA
## 3 2020-01-03 0.5 -2.5 4.5 NA 0 153 3.7 NA 1028. NA
## 4 2020-01-04 3.4 2.2 5.7 NA 0 264 15.5 NA 1028. NA
## 5 2020-01-05 1.3 -2.1 3.3 NA 0 334 3.7 NA 1036. NA
## 6 2020-01-06 -1.1 -3.6 2.5 NA 0 18 2.1 NA 1032. NA
## 7 2020-01-07 0.2 -3.4 3 NA 0 332 2.6 NA 1031. NA
## 8 2020-01-08 1.5 0.2 2.6 NA 0 84 3.4 NA NA NA
## 9 2020-01-09 4.1 1.5 9.3 NA 0 35 2.3 NA 1024. NA
## 10 2020-01-10 5.1 1.3 9.4 NA 0 323 3.8 NA 1022. NA
## # … with 922 more rows
## # ℹ Use `print(n = ...)` to see more rows
As we can see in Hohenfels data isn’t that accurate and precipitation is data is missing fr year 2020, so for the further analysis we take only Ingolstadt data.
ingolstadt_weather_weekly = ingolstadt_weather %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(date = first(date), prcp_week = sum(prcp), tavg= mean(tavg),snow_week =sum( snow),wspd = mean(wspd),tmax = max(tmax)) %>%
arrange(year_week)
#ingolstadt_weather_weekly = unique(weatherstack_kelheim_weekly)
print(ingolstadt_weather_weekly)
## # A tibble: 139 × 7
## year_week date prcp_week tavg snow_week wspd tmax
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-1 2020-01-01 0.8 0.94 0 9.44 6.2
## 2 2020-10 2020-03-02 13.9 4.14 0 13.2 9.6
## 3 2020-11 2020-03-09 13.1 7.14 0 15.4 17.7
## 4 2020-12 2020-03-16 9.9 7.16 0 9.21 19
## 5 2020-13 2020-03-23 0 4.44 0 14.2 NA
## 6 2020-14 2020-03-30 0 4.61 0 7.86 17.3
## 7 2020-15 2020-04-06 0 12.2 0 5.23 22.4
## 8 2020-16 2020-04-13 3.8 10.9 0 8.23 23.6
## 9 2020-17 2020-04-20 0 12.9 0 14.7 21
## 10 2020-18 2020-04-27 18.6 11.3 0 10.3 20.1
## # … with 129 more rows
## # ℹ Use `print(n = ...)` to see more rows
mob_joined_with_ingolstadt = ingolstadt_weather_weekly %>%
inner_join(snz_mobility_kelheim_year_week, by = "year_week") %>%
select(-date.x) %>%
rename(date = date.y) %>%
replace_na(list(tmax = 0))
print(mob_joined_with_ingolstadt)
## # A tibble: 126 × 8
## year_week prcp_week tavg snow_week wspd tmax date not_at_home_change
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <date> <dbl>
## 1 2020-10 13.9 4.14 0 13.2 9.6 2020-03-06 0
## 2 2020-11 13.1 7.14 0 15.4 17.7 2020-03-13 0
## 3 2020-12 9.9 7.16 0 9.21 19 2020-03-20 -14
## 4 2020-13 0 4.44 0 14.2 0 2020-03-27 -23
## 5 2020-14 0 4.61 0 7.86 17.3 2020-04-03 -20
## 6 2020-15 0 12.2 0 5.23 22.4 2020-04-10 -18
## 7 2020-16 3.8 10.9 0 8.23 23.6 2020-04-17 -19
## 8 2020-17 0 12.9 0 14.7 21 2020-04-24 -17
## 9 2020-18 18.6 11.3 0 10.3 20.1 2020-05-01 -13
## 10 2020-19 2.3 13.3 0 6.73 25.8 2020-05-08 -9
## # … with 116 more rows
## # ℹ Use `print(n = ...)` to see more rows
#First plot with colour as precipitation
fills <- c("Ingolstadt" = "blue", "Berlin" = "red")
plt_ing_color = ggplot(mob_joined_with_ingolstadt)+
geom_point(aes(x = date,y = not_at_home_change,colour = prcp_week,fill = "Ingolstadt"))+
geom_point(data = mob_joined_with_weather_berlin,aes(x = date,y = not_at_home_change,colour = prcp_week,fill = "Berlin"))+
scale_color_gradient(low = "white",high = "black")+
scale_fill_manual(values = fills)
ggplotly(plt_ing_color)
plt_ing_color = ggplot(mob_joined_with_ingolstadt)+
geom_point(aes(x = date,y = not_at_home_change))+
geom_line(aes(x = date,y = prcp_week,color = "Ingolstadt"))+
#geom_line(data = berlin_weather_weekly,aes(x = date,y = prcp_week,color = "Berlin"))+
scale_color_manual(values = fills)+
ggtitle("Kelheim mobility with precipitation as line plot on same axis")
ggplotly(plt_ing_color)
# replace coluumn positioning
mob_joined_with_weather_berlin = mob_joined_with_weather_berlin %>% select(year_week,prcp_week,tavg,snow_week,wspd,tmax,date,not_at_home_change)
mob_joined_with_weather_berlin = mob_joined_with_weather_berlin %>% mutate(landkreis = "Berlin")
mob_joined_with_ingolstadt = mob_joined_with_ingolstadt %>% mutate(landkreis = "Ingolstadt")
mob_joined_with_ing_berlin = rbind(mob_joined_with_ingolstadt,mob_joined_with_weather_berlin)
plt_hist_precip_ing = ggplot(mob_joined_with_ing_berlin,aes(x = prcp_week,y = not_at_home_change,fill = landkreis))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 2,position = position_dodge())
ggplotly(plt_hist_precip_ing)
plt_hist_precip_ing = ggplot(mob_joined_with_ing_berlin,aes(x = tavg,y = not_at_home_change,fill = landkreis))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 2,position = position_dodge2())
ggplotly(plt_hist_precip_ing)
plt_hist_precip_ing = ggplot(mob_joined_with_ing_berlin,aes(x = tmax,y = not_at_home_change,fill = landkreis))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 2)
ggplotly(plt_hist_precip_ing)
plt_hist_precip_ing = ggplot(mob_joined_with_ing_berlin,aes(x = snow_week,y = not_at_home_change,fill = landkreis))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 5)
ggplotly(plt_hist_precip_ing)
## Warning: Removed 1 rows containing non-finite values (stat_summary_bin).
After first look at data, we can assume that hours out of home strongly depend on average temperature outside, that sounds logical. Mb categorization seasons of the data will help to understand this function
mob_joined_with_ing_berlin = mob_joined_with_ing_berlin %>%
mutate(season = ifelse(month(date) %in% c(12,1,2),"winter",NA)) %>%
mutate(season = ifelse(month(date) %in% c(3,4,5),"spring",season)) %>%
mutate(season = ifelse(month(date) %in% c(6,7,8),"summer",season)) %>%
mutate(season = ifelse(month(date) %in% c(9,10,11),"autumn",season))
#insert also a data about overall in germany
plt_hist_season = ggplot(mob_joined_with_ing_berlin,aes(x = season,y = not_at_home_change,fill = landkreis))+
stat_summary_bin(fun = "mean",
geom = "bar",
binwidth = 5,position = position_dodge())
ggplotly(plt_hist_season)
So it seems that season has an enormous impact at mobility of citizens. Another important parameter can be description of the weather based on Kelheim statistics, we will merge it into Ingolstadt weather, because of the assumption, that Ingolstadt ad Kelheim have the similar weather properties.
type_of_weather = unique(weatherstack_kelheim$description)
weatherstack_kelheim_year_week = weatherstack_kelheim %>% mutate(year_week = paste0(isoyear(date),"-",isoweek(date)))
week_description_impact = weatherstack_kelheim_year_week %>% group_by(year_week) %>% count(description)
week_description_impact = week_description_impact %>% pivot_wider(names_from = description,values_from = n)
#remove NAs
week_description_impact[is.na(week_description_impact)] = 0
print(week_description_impact)
## # A tibble: 717 × 46
## # Groups: year_week [717]
## year_week Clear Fog Light dri…¹ Mist Moder…² Partl…³ Patch…⁴ Patch…⁵ Sunny Cloudy Heavy…⁶ Heavy…⁷ Light…⁸ Light…⁹
## <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 2008-27 50 1 6 3 3 21 3 6 51 0 0 0 0 0
## 2 2008-28 22 5 6 6 3 34 12 9 14 3 3 3 3 12
## 3 2008-29 30 6 3 12 3 44 9 6 19 9 0 0 3 9
## 4 2008-30 27 0 3 9 0 54 9 0 33 18 0 0 3 0
## 5 2008-31 60 0 0 3 0 24 3 6 51 3 0 0 0 3
## 6 2008-32 54 3 3 0 3 27 0 0 48 3 0 0 0 12
## 7 2008-33 29 0 6 9 9 39 12 9 37 6 0 0 0 0
## 8 2008-34 29 0 0 6 6 54 0 3 43 0 0 3 0 12
## 9 2008-35 62 0 0 12 0 16 0 3 66 6 0 0 3 0
## 10 2008-36 26 6 6 19 0 17 0 12 34 0 0 0 9 6
## # … with 707 more rows, 31 more variables: `Moderate or heavy rain shower` <int>,
## # `Patchy light rain with thunder` <int>, `Patchy rain possible` <int>, `Moderate rain at times` <int>,
## # Overcast <int>, `Thundery outbreaks possible` <int>, `Light sleet` <int>, `Moderate or heavy sleet` <int>,
## # `Patchy heavy snow` <int>, `Freezing fog` <int>, Blizzard <int>, `Heavy snow` <int>, `Light snow` <int>,
## # `Moderate or heavy rain with thunder` <int>, `Moderate or heavy snow showers` <int>,
## # `Moderate or heavy snow with thunder` <int>, `Moderate snow` <int>, `Patchy light snow` <int>,
## # `Patchy moderate snow` <int>, `Moderate or heavy freezing rain` <int>, `Light sleet showers` <int>, …
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
mob_joined_with_ingolstadt_description = mob_joined_with_ingolstadt %>% inner_join(week_description_impact, by = "year_week")
#normalize it to a percentage
mob_joined_with_ingolstadt_description[type_of_weather] = mob_joined_with_ingolstadt_description[type_of_weather]/168 #168 hours a week
#Assumption not_at_home_change is calculated through individual weather impact normally
#=> each type of weather for the weak get its own weather impact based on not_at_home
mob_joined_with_ingolstadt_description[type_of_weather] = mob_joined_with_ingolstadt_description[type_of_weather]*mob_joined_with_ingolstadt_description$not_at_home_change
print(mob_joined_with_ingolstadt_description)
## # A tibble: 107 × 54
## year_week prcp_week tavg snow_…¹ wspd tmax date not_a…² landk…³ Clear Fog Light…⁴ Mist Moder…⁵ Partl…⁶
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <date> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-10 13.9 4.14 0 13.2 9.6 2020-03-06 0 Ingols… 0 0 0 0 0 0
## 2 2020-11 13.1 7.14 0 15.4 17.7 2020-03-13 0 Ingols… 0 0 0 0 0 0
## 3 2020-12 9.9 7.16 0 9.21 19 2020-03-20 -14 Ingols… -0.333 -0.5 0 -0.5 -0.25 -8
## 4 2020-13 0 4.44 0 14.2 0 2020-03-27 -23 Ingols… -3.42 0 0 0 0 -13.6
## 5 2020-14 0 4.61 0 7.86 17.3 2020-04-03 -20 Ingols… -2.62 0 0 0 0 -11.8
## 6 2020-15 0 12.2 0 5.23 22.4 2020-04-10 -18 Ingols… -0.964 0 0 0 0 -16.4
## 7 2020-16 3.8 10.9 0 8.23 23.6 2020-04-17 -19 Ingols… -1.24 0 0 0 0 -12.0
## 8 2020-17 0 12.9 0 14.7 21 2020-04-24 -17 Ingols… -2.93 0 0 0 0 -9.11
## 9 2020-18 18.6 11.3 0 10.3 20.1 2020-05-01 -13 Ingols… 0 0 -0.929 -0.696 -0.232 -6.96
## 10 2020-19 2.3 13.3 0 6.73 25.8 2020-05-08 -9 Ingols… -0.161 0 0 -0.161 0 -5.73
## # … with 97 more rows, 39 more variables: `Patchy light drizzle` <dbl>, `Patchy light rain` <dbl>, Sunny <dbl>,
## # Cloudy <dbl>, `Heavy rain` <dbl>, `Heavy rain at times` <dbl>, `Light rain` <dbl>, `Light rain shower` <dbl>,
## # `Moderate or heavy rain shower` <dbl>, `Patchy light rain with thunder` <dbl>, `Patchy rain possible` <dbl>,
## # `Moderate rain at times` <dbl>, Overcast <dbl>, `Thundery outbreaks possible` <dbl>, `Light sleet` <dbl>,
## # `Moderate or heavy sleet` <dbl>, `Patchy heavy snow` <dbl>, `Freezing fog` <dbl>, Blizzard <dbl>,
## # `Heavy snow` <dbl>, `Light snow` <dbl>, `Moderate or heavy rain with thunder` <dbl>,
## # `Moderate or heavy snow showers` <dbl>, `Moderate or heavy snow with thunder` <dbl>, `Moderate snow` <dbl>, …
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
mob_joined_with_ingolstadt_description_longer = mob_joined_with_ingolstadt_description%>% pivot_longer(cols = all_of(type_of_weather),names_to = "description")
description_impact_overall = mob_joined_with_ingolstadt_description_longer %>%
group_by(description) %>% summarize(impact = mean(value))
plot_ly(data = description_impact_overall,x = ~description,y = ~impact,type= "bar")